<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head><link rel="stylesheet" type="text/css" href="insn.css"/><meta name="generator" content="iform.xsl"/><title>BFDOT (vector) -- A64</title></head><body><table style="margin: 0 auto;"><tr><td><div class="topbar"><a href="index.html">Base Instructions</a></div></td><td><div class="topbar"><a href="fpsimdindex.html">SIMD&amp;FP Instructions</a></div></td><td><div class="topbar"><a href="sveindex.html">SVE Instructions</a></div></td><td><div class="topbar"><a href="mortlachindex.html">SME Instructions</a></div></td><td><div class="topbar"><a href="encodingindex.html">Index by Encoding</a></div></td><td><div class="topbar"><a href="shared_pseudocode.html">Shared Pseudocode</a></div></td><td><div class="topbar"><a href="notice.html">Proprietary Notice</a></div></td></tr></table><hr/><h2 class="instruction-section">BFDOT (vector)</h2>
      <p class="aml">BFloat16 floating-point dot product (vector). This instruction delimits the source vectors into pairs of BFloat16 elements.</p>
      <p class="aml">If FEAT_EBF16 is not implemented or <a class="armarm-xref" title="Reference to Armv8 ARM section">FPCR</a>.EBF is 0, this instruction:</p>
      <ul>
        <li>Performs an unfused sum-of-products of each pair of adjacent BFloat16 elements in the source vectors. The intermediate single-precision products are rounded before they are summed, and the intermediate sum is rounded before accumulation into the single-precision destination element that overlaps with the corresponding pair of BFloat16 elements in the source vectors.</li>
        <li>Uses the non-IEEE 754 Round-to-Odd rounding mode, which forces bit 0 of an inexact result to 1, and rounds an overflow to an appropriately signed Infinity.</li>
        <li>Flushes denormalized inputs and results to zero, as if <a class="armarm-xref" title="Reference to Armv8 ARM section">FPCR</a>.{FZ, FIZ} is {1, 1}.</li>
        <li>Disables alternative floating point behaviors, as if <a class="armarm-xref" title="Reference to Armv8 ARM section">FPCR</a>.AH is 0.</li>
      </ul>
      <p class="aml">If FEAT_EBF16 is implemented and <a class="armarm-xref" title="Reference to Armv8 ARM section">FPCR</a>.EBF is 1, then this instruction:</p>
      <ul>
        <li>Performs a fused sum-of-products of each pair of adjacent BFloat16 elements in the source vectors. The intermediate single-precision products are not rounded before they are summed, but the intermediate sum is rounded before accumulation into the single-precision destination element that overlaps with the corresponding pair of BFloat16 elements in the source vectors.</li>
        <li>Follows all other floating-point behaviors that apply to single-precision arithmetic, as governed by <a class="armarm-xref" title="Reference to Armv8 ARM section">FPCR</a>.RMode, <a class="armarm-xref" title="Reference to Armv8 ARM section">FPCR</a>.FZ, <a class="armarm-xref" title="Reference to Armv8 ARM section">FPCR</a>.AH, and <a class="armarm-xref" title="Reference to Armv8 ARM section">FPCR</a>.FIZ.</li>
      </ul>
      <p class="aml">Irrespective of FEAT_EBF16 and <a class="armarm-xref" title="Reference to Armv8 ARM section">FPCR</a>.EBF, this instruction:</p>
      <ul>
        <li>Does not modify the cumulative <a class="armarm-xref" title="Reference to Armv8 ARM section">FPSR</a> exception bits (IDC, IXC, UFC, OFC, DZC, and IOC).</li>
        <li>Disables trapped floating-point exceptions, as if the <a class="armarm-xref" title="Reference to Armv8 ARM section">FPCR</a> trap enable bits (IDE, IXE, UFE, OFE, DZE, and IOE) are all zero.</li>
        <li>Generates only the default NaN, as if <a class="armarm-xref" title="Reference to Armv8 ARM section">FPCR</a>.DN is 1.</li>
      </ul>
      <p class="aml"><a class="armarm-xref" title="Reference to Armv8 ARM section">ID_AA64ISAR1_EL1</a>.BF16 indicates whether this instruction is supported.</p>
    
    <h3 class="classheading"><a id="iclass_simd"/>Vector<span style="font-size:smaller;"><br/>(FEAT_BF16)
          </span></h3><p class="desc"/><div class="regdiagram-32"><table class="regdiagram"><thead><tr><td>31</td><td>30</td><td>29</td><td>28</td><td>27</td><td>26</td><td>25</td><td>24</td><td>23</td><td>22</td><td>21</td><td>20</td><td>19</td><td>18</td><td>17</td><td>16</td><td>15</td><td>14</td><td>13</td><td>12</td><td>11</td><td>10</td><td>9</td><td>8</td><td>7</td><td>6</td><td>5</td><td>4</td><td>3</td><td>2</td><td>1</td><td>0</td></tr></thead><tbody><tr class="firstrow"><td class="lr">0</td><td class="lr">Q</td><td class="lr">1</td><td class="l">0</td><td>1</td><td>1</td><td>1</td><td class="r">0</td><td class="l">0</td><td class="r">1</td><td class="lr">0</td><td colspan="5" class="lr">Rm</td><td class="lr">1</td><td class="l">1</td><td>1</td><td>1</td><td class="r">1</td><td class="lr">1</td><td colspan="5" class="lr">Rn</td><td colspan="5" class="lr">Rd</td></tr></tbody></table></div><div class="encoding"><h4 class="encoding"/><a id="BFDOT_asimdsame2_D"/><p class="asm-code">BFDOT  <a href="#sa_vd" title="SIMD&amp;FP destination register (field &quot;Rd&quot;)">&lt;Vd&gt;</a>.<a href="#sa_ta" title="Arrangement specifier (field &quot;Q&quot;) [2S,4S]">&lt;Ta&gt;</a>, <a href="#sa_vn" title="First SIMD&amp;FP source register (field &quot;Rn&quot;)">&lt;Vn&gt;</a>.<a href="#sa_tb" title="Arrangement specifier (field &quot;Q&quot;) [4H,8H]">&lt;Tb&gt;</a>, <a href="#sa_vm" title="Second SIMD&amp;FP source register (field &quot;Rm&quot;)">&lt;Vm&gt;</a>.<a href="#sa_tb" title="Arrangement specifier (field &quot;Q&quot;) [4H,8H]">&lt;Tb&gt;</a></p></div><p class="pseudocode">if !<a href="shared_pseudocode.html#impl-shared.HaveBF16Ext.0" title="function: boolean HaveBF16Ext()">HaveBF16Ext</a>() then UNDEFINED;
integer n = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(Rn);
integer m = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(Rm);
integer d = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(Rd);
integer datasize = if Q == '1' then 128 else 64;
integer elements = datasize DIV 32;</p>
  <div class="encoding-notes"/><h3 class="explanations">Assembler Symbols</h3><div class="explanations"><table><col class="asyn-l"/><col class="asyn-r"/><tr><td>&lt;Vd&gt;</td><td><a id="sa_vd"/>
        
          <p class="aml">Is the name of the SIMD&amp;FP destination register, encoded in the "Rd" field.</p>
        
      </td></tr></table><table><col class="asyn-l"/><col class="asyn-r"/><tr><td>&lt;Ta&gt;</td><td><a id="sa_ta"/>
        <p>Is an arrangement specifier, 
      encoded in
      <q>Q</q>:
        </p>
        <table class="valuetable">
          
            <thead>
              <tr>
                <th class="bitfield">Q</th>
                <th class="symbol">&lt;Ta&gt;</th>
              </tr>
            </thead>
            <tbody>
              <tr>
                <td class="bitfield">0</td>
                <td class="symbol">2S</td>
              </tr>
              <tr>
                <td class="bitfield">1</td>
                <td class="symbol">4S</td>
              </tr>
            </tbody>
          
        </table>
      </td></tr></table><table><col class="asyn-l"/><col class="asyn-r"/><tr><td>&lt;Vn&gt;</td><td><a id="sa_vn"/>
        
          <p class="aml">Is the name of the first SIMD&amp;FP source register, encoded in the "Rn" field.</p>
        
      </td></tr></table><table><col class="asyn-l"/><col class="asyn-r"/><tr><td>&lt;Tb&gt;</td><td><a id="sa_tb"/>
        <p>Is an arrangement specifier, 
      encoded in
      <q>Q</q>:
        </p>
        <table class="valuetable">
          
            <thead>
              <tr>
                <th class="bitfield">Q</th>
                <th class="symbol">&lt;Tb&gt;</th>
              </tr>
            </thead>
            <tbody>
              <tr>
                <td class="bitfield">0</td>
                <td class="symbol">4H</td>
              </tr>
              <tr>
                <td class="bitfield">1</td>
                <td class="symbol">8H</td>
              </tr>
            </tbody>
          
        </table>
      </td></tr></table><table><col class="asyn-l"/><col class="asyn-r"/><tr><td>&lt;Vm&gt;</td><td><a id="sa_vm"/>
        
          <p class="aml">Is the name of the second SIMD&amp;FP source register, encoded in the "Rm" field.</p>
        
      </td></tr></table></div><div class="syntax-notes"/>
    <div class="ps"><a id="execute"/><h3 class="pseudocode">Operation</h3>
      <p class="pseudocode"><a href="shared_pseudocode.html#impl-aarch64.CheckFPAdvSIMDEnabled64.0" title="function: CheckFPAdvSIMDEnabled64()">CheckFPAdvSIMDEnabled64</a>();
bits(datasize) operand1 = <a href="shared_pseudocode.html#impl-aarch64.V.read.2" title="accessor: bits(width) V[integer n, integer width]">V</a>[n, datasize];
bits(datasize) operand2 = <a href="shared_pseudocode.html#impl-aarch64.V.read.2" title="accessor: bits(width) V[integer n, integer width]">V</a>[m, datasize];
bits(datasize) operand3 = <a href="shared_pseudocode.html#impl-aarch64.V.read.2" title="accessor: bits(width) V[integer n, integer width]">V</a>[d, datasize];
bits(datasize) result;

for e = 0 to elements-1
    bits(16) elt1_a = <a href="shared_pseudocode.html#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*e+0, 16];
    bits(16) elt1_b = <a href="shared_pseudocode.html#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, 2*e+1, 16];
    bits(16) elt2_a = <a href="shared_pseudocode.html#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*e+0, 16];
    bits(16) elt2_b = <a href="shared_pseudocode.html#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, 2*e+1, 16];

    bits(32) sum = <a href="shared_pseudocode.html#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, e, 32];
    sum = <a href="shared_pseudocode.html#impl-shared.BFDotAdd.6" title="function: bits(N) BFDotAdd(bits(N) addend, bits(N DIV 2) op1_a, bits(N DIV 2) op1_b, bits(N DIV 2) op2_a, bits(N DIV 2) op2_b, FPCRType fpcr_in)">BFDotAdd</a>(sum, elt1_a, elt1_b, elt2_a, elt2_b, FPCR[]);
    <a href="shared_pseudocode.html#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, e, 32] = sum;

<a href="shared_pseudocode.html#impl-aarch64.V.write.2" title="accessor: V[integer n, integer width] = bits(width) value">V</a>[d, datasize] = result;</p>
    </div>
  <hr/><table style="margin: 0 auto;"><tr><td><div class="topbar"><a href="index.html">Base Instructions</a></div></td><td><div class="topbar"><a href="fpsimdindex.html">SIMD&amp;FP Instructions</a></div></td><td><div class="topbar"><a href="sveindex.html">SVE Instructions</a></div></td><td><div class="topbar"><a href="mortlachindex.html">SME Instructions</a></div></td><td><div class="topbar"><a href="encodingindex.html">Index by Encoding</a></div></td><td><div class="topbar"><a href="shared_pseudocode.html">Shared Pseudocode</a></div></td><td><div class="topbar"><a href="notice.html">Proprietary Notice</a></div></td></tr></table><p class="versions">
      Internal version only: isa v33.62, AdvSIMD v29.12, pseudocode v2023-03_rel, sve v2023-03_rc3b
      ; Build timestamp: 2023-03-31T11:36
    </p><p class="copyconf">
      Copyright © 2010-2023 Arm Limited or its affiliates. All rights reserved.
      This document is Non-Confidential.
    </p></body></html>
